{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3bae6647-1695-4136-be4a-416c63dccdb7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import netCDF4 as nc\n",
    "import numpy as np\n",
    "from scipy.spatial import cKDTree\n",
    "import os\n",
    "import h5py\n",
    "import matplotlib.pyplot as plt\n",
    "from tqdm.notebook import tqdm \n",
    "\n",
    "# ============================================================\n",
    "# 1. Configuration & Parameters \n",
    "# ============================================================\n",
    "\n",
    "# scale of index, INDEX is label(0~2999)\n",
    "START_INDEX =     \n",
    "END_INDEX = \n",
    "\n",
    "# Directory \n",
    "BASE_PATH = ''  # Base Path\n",
    "SOURCE_DIR = os.path.join(BASE_PATH, '') # source_data\n",
    "TARGET_DIR = os.path.join(BASE_PATH, '') # uniform_data\n",
    "OUTPUT_ROOT = ''  # Result path\n",
    "\n",
    "SAFETY_BUFFER = 0.045\n",
    "CONNECTION_RADIUS = 0.091429 + SAFETY_BUFFER  \n",
    "\n",
    "# Output directory\n",
    "os.makedirs(os.path.join(OUTPUT_ROOT, 'coords'), exist_ok=True)\n",
    "os.makedirs(os.path.join(OUTPUT_ROOT, 'adjLists'), exist_ok=True)\n",
    "os.makedirs(os.path.join(OUTPUT_ROOT, 'flow'), exist_ok=True)\n",
    "\n",
    "print(f\"Set processing range: {START_INDEX} to {END_INDEX}\")\n",
    "print(f\"Output directory: {OUTPUT_ROOT}\")\n",
    "\n",
    "# Reynolds label(Re=3000->0~999, Re=4000->1000~1999, Re=5000->2000~2999)\n",
    "def get_reynolds_number(label):\n",
    "    if label < 1000:\n",
    "        return 3000\n",
    "    elif (1000 <= label < 2000):\n",
    "        return 4000\n",
    "    else:\n",
    "        return 5000\n",
    "\n",
    "def process_single_case(label, source_dir, target_dir, output_root):\n",
    "    Re_num = get_reynolds_number(label)\n",
    "    path_grid_source = os.path.join(source_dir, f'grid_Re{Re_num}/grid_{label}.Netcdf') # Chaning Re Numbers\n",
    "    path_info_source = os.path.join(source_dir, f'Info_Re{Re_num}/Info_{label}.Netcdf')\n",
    "    path_mean_source = os.path.join(source_dir, f'Mean_Re{Re_num}/Mean_s1_02000001-02500000_{label}.Netcdf')\n",
    "    path_info_target = os.path.join(target_dir, f'Info_uniform_Re{Re_num}/Info_uniform_{label}.Netcdf')\n",
    "\n",
    "    if not os.path.exists(path_mean_source):\n",
    "        print(f\"[Skip] Source file not found: {path_mean_source}\")\n",
    "        return False\n",
    "\n",
    "    # -------------------------------------------------\n",
    "    # 2. Source Data Loading\n",
    "    # -------------------------------------------------\n",
    "    with nc.Dataset(path_info_source, 'r') as src_info, \\\n",
    "         nc.Dataset(path_grid_source, 'r') as src_grid, \\\n",
    "         nc.Dataset(path_mean_source, 'r') as src_mean:\n",
    "\n",
    "        src_x = src_info.variables['coord_0'][:]\n",
    "        src_y = src_info.variables['coord_1'][:]\n",
    "        \n",
    "        cell_info = src_grid.variables['cellInfo'][:]\n",
    "        valid_mask = (cell_info == 0)\n",
    "        \n",
    "        src_coords_valid = np.column_stack([src_x[valid_mask], src_y[valid_mask]])\n",
    "        u_src = src_mean.variables['variables0'][:][valid_mask]\n",
    "        v_src = src_mean.variables['variables1'][:][valid_mask]\n",
    "        rho_src = src_mean.variables['variables2'][:][valid_mask]\n",
    "\n",
    "    # -------------------------------------------------\n",
    "    # 3. Target Grid Loading\n",
    "    # -------------------------------------------------\n",
    "    if not os.path.exists(path_info_target):\n",
    "        path_info_target = os.path.join(target_dir, f'Info_{label}.Netcdf') \n",
    "        \n",
    "    with nc.Dataset(path_info_target, 'r') as tgt_info:\n",
    "        tgt_x = tgt_info.variables['coord_0'][:]\n",
    "        tgt_y = tgt_info.variables['coord_1'][:]\n",
    "        target_coords = np.column_stack([tgt_x, tgt_y])\n",
    "\n",
    "    # -------------------------------------------------\n",
    "    # 4. cKDTree\n",
    "    # -------------------------------------------------\n",
    "    tree_mapping = cKDTree(src_coords_valid)\n",
    "    tree_graph = cKDTree(target_coords)\n",
    "\n",
    "    # -------------------------------------------------\n",
    "    # 5. Data Processing: Mapping (Flow Variables)\n",
    "    # -------------------------------------------------\n",
    "    _, indices = tree_mapping.query(target_coords, k=1)\n",
    "    \n",
    "    u_mapped = u_src[indices].reshape(-1, 1)\n",
    "    v_mapped = v_src[indices].reshape(-1, 1)\n",
    "    rho_mapped = rho_src[indices].reshape(-1, 1)\n",
    "    \n",
    "    flow_data = np.hstack([u_mapped, v_mapped, rho_mapped])\n",
    "\n",
    "    # -------------------------------------------------\n",
    "    # 6. Data Processing: Graph Generation (Adjacency)\n",
    "    # -------------------------------------------------\n",
    "    pairs = tree_graph.query_pairs(r=CONNECTION_RADIUS)\n",
    "    \n",
    "    edge_list = []\n",
    "    for i, j in pairs:\n",
    "        edge_list.append([i, j])\n",
    "        edge_list.append([j, i])\n",
    "    \n",
    "    adj_array = np.array(edge_list, dtype=np.int32)\n",
    "\n",
    "    # -------------------------------------------------\n",
    "    # 7. Data Processing: Normalization (Features)\n",
    "    # -------------------------------------------------\n",
    "    # Min-Max Normalization\n",
    "    max_x, min_x = np.max(tgt_x), np.min(tgt_x)\n",
    "    max_y, min_y = np.max(tgt_y), np.min(tgt_y)\n",
    "    \n",
    "    x_norm = (tgt_x - min_x) / (max_x - min_x)\n",
    "    y_norm = (tgt_y - min_y) / (max_y - min_y)\n",
    "    \n",
    "    x_norm = x_norm.reshape(-1, 1)\n",
    "    y_norm = y_norm.reshape(-1, 1)\n",
    "    \n",
    "    # Re Number Normalization\n",
    "    re_val = get_reynolds_number(label)\n",
    "    re_norm_val = (re_val - 3000) / (5000 - 3000)\n",
    "    re_norm_array = np.full_like(x_norm, re_norm_val)\n",
    "    \n",
    "    coord_data = np.hstack([x_norm, y_norm, re_norm_array])\n",
    "\n",
    "    # -------------------------------------------------\n",
    "    # 8. Save to HDF5\n",
    "    # -------------------------------------------------\n",
    "    with h5py.File(os.path.join(output_root, f'flow/flow_{label}.hdf5'), 'w') as f:\n",
    "        f.create_dataset('dataset', data=flow_data)\n",
    "        \n",
    "    with h5py.File(os.path.join(output_root, f'adjLists/adjLst_{label}.hdf5'), 'w') as f:\n",
    "        f.create_dataset('dataset', data=adj_array)\n",
    "        \n",
    "    with h5py.File(os.path.join(output_root, f'coords/coord_{label}.hdf5'), 'w') as f:\n",
    "        f.create_dataset('dataset', data=coord_data)\n",
    "\n",
    "    return True\n",
    "\n",
    "success_count = 0\n",
    "for label in tqdm(range(START_INDEX, END_INDEX), desc=\"Processing Cases\"):\n",
    "    try:\n",
    "        if process_single_case(label, SOURCE_DIR, TARGET_DIR, OUTPUT_ROOT):\n",
    "            success_count += 1\n",
    "    except Exception as e:\n",
    "        print(f\"Error processing label {label}: {str(e)}\")\n",
    "\n",
    "print(f\"\\nDone! Successfully processed {success_count} cases.\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
